{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NASA Spacecraft Telemetry Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import json\n",
    "import os\n",
    "from typing import Final, List, Callable\n",
    "from config import data_raw_folder, data_processed_folder\n",
    "from timeeval import Datasets, DatasetRecord\n",
    "from timeeval.datasets import DatasetAnalyzer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking for source datasets in /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data and\n",
      "saving processed datasets in /home/projects/akita/data/benchmark-data/data-processed\n"
     ]
    }
   ],
   "source": [
    "dataset_collection_prefix = \"NASA\"\n",
    "source_folder = os.path.join(data_raw_folder, \"NASA Spacecraft Telemetry Data\")\n",
    "target_folder = data_processed_folder\n",
    "\n",
    "from pathlib import Path\n",
    "print(f\"Looking for source datasets in {Path(source_folder).absolute()} and\\nsaving processed datasets in {Path(target_folder).absolute()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_target_subfolder(input_type: str, dataset_collection_name: str) -> str:\n",
    "    dataset_subfolder = os.path.join(input_type, dataset_collection_name)\n",
    "    target_subfolder = os.path.join(target_folder, dataset_subfolder)\n",
    "    try:\n",
    "        os.makedirs(target_subfolder)\n",
    "        print(f\"Created directories {target_subfolder}\")\n",
    "    except FileExistsError:\n",
    "        print(f\"Directories {target_subfolder} already exist\")\n",
    "        pass\n",
    "\n",
    "    return dataset_subfolder, target_subfolder\n",
    "\n",
    "def transform_and_label(source: str, target: str, anomaly_windows: List[str], force_all_normal: bool = False) -> None:\n",
    "    df = pd.DataFrame(np.load(source)[:,0], columns=[\"value\"])\n",
    "    df = df[[\"value\"]]\n",
    "    df.insert(0, \"timestamp\", df.index.values)\n",
    "    df[\"is_anomaly\"] = 0\n",
    "\n",
    "    if not force_all_normal:\n",
    "        for t1, t2 in anomaly_windows:\n",
    "            tmp = df[df.index >= t1]\n",
    "            tmp = tmp[tmp.index <= t2]\n",
    "            df[\"is_anomaly\"].values[tmp.index] = 1\n",
    "\n",
    "    df.to_csv(target, index=False)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# shared by all datasets\n",
    "dataset_type = \"real\"\n",
    "input_type = \"univariate\"\n",
    "datetime_index = False\n",
    "train_type = \"semi-supervised\"\n",
    "train_is_normal = True\n",
    "\n",
    "dm = Datasets(target_folder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'P-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/S-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/S-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'S-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/S-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/S-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/S-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-3.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-3') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-3.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-3.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-4.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-4') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-4.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-4.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-5.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-5') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-5.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-5.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-6.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-6') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-6.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-6.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-7.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-7') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-7.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-7.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-8.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-8') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-8.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-8.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-9.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-9.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-9') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-9.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-9.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-9.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-10.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-10.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-10') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-10.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-10.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-10.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-11.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-11.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-11') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-11.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-11.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-11.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-12.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-12.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-12') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-12.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-12.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-12.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/E-13.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-13.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'E-13') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-13.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/E-13.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/E-13.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'P-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-3.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'P-3') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-3.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'D-2') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-3.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-3.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-3') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-3.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-3.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-4.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-4') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-4.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-4.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-3.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-3') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-3.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-3.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-4.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-4') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-4.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-4.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/G-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'G-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'G-2') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/G-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/G-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'G-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/G-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-5.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-5') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-5.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-5.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-6.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-6') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-6.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'D-7') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-6.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-7.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-7') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-7.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-7.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/F-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'F-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'P-4') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/F-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-4.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'P-4') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-4.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-4.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/G-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-3.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'G-3') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-3.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/G-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-3.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'T-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'T-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'D-8') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-8.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-8') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-8.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'D-9') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-8.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-9.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-9.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-9') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-9.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-9.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-9.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/F-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'F-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/F-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/G-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-4.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'G-4') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-4.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/G-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-4.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-3.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'T-3') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-3.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/T-3.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-11.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-11.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-11') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-11.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'D-12') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-11.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-11.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-12.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-12.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-12') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-12.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'B-1') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-12.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-12.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/B-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/B-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'B-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/B-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/B-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/B-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/G-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-6.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'G-6') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-6.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/G-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-6.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/G-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-7.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'G-7') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-7.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/G-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/G-7.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-7.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'P-7') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-7.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-7.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/R-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/R-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'R-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/R-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/R-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/R-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-5.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-5') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-5.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-5.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-6.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-6') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-6.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-6.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-7.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-7') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-7.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-SMAP', 'D-13') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-7.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-13.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-13.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'D-13') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-13.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-13.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/D-13.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'P-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/P-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-8.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-8') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-8.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-8.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/A-9.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-9.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'A-9') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-9.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/A-9.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/A-9.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/F-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-3.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-SMAP', 'F-3') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-3.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-MSL', 'M-6') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n",
      "[('NASA-MSL', 'M-6') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-6.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/F-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-SMAP/F-3.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/M-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-6.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/M-6.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-6.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'M-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/M-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-1.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/M-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'M-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-MSL', 'S-2') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/M-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-2.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/M-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/S-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/S-2.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'S-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/S-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/S-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/S-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-10.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-10.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'P-10') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-10.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-10.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-10.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-4.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'T-4') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-4.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-MSL', 'T-5') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-4.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-5.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'T-5') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-5.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-5.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/F-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-7.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'F-7') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-7.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/F-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-7.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/M-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-3.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'M-3') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-3.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/M-3.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-3.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/M-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-4.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'M-4') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-4.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/M-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-4.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/M-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-5.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'M-5') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-5.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/M-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-5.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-15.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-15.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'P-15') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-15.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-15.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-15.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/C-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/C-1.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'C-1') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/C-1.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-MSL', 'C-2') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n",
      "[('NASA-MSL', 'C-2') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/C-2.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/C-1.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/C-1.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/C-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/C-2.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/C-2.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/C-2.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'T-12') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-12.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-12.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-12.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-12.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-12.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-13.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-13.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'T-13') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-13.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-13.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-13.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/F-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-4.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'F-4') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-4.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/F-4.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-4.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/F-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-5.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'F-5') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-5.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-MSL', 'D-14') (train)] KPSS trend stationarity test for value encountered an error: cannot convert float NaN to integer\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/F-5.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-5.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-14.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-14.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'D-14') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-14.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-MSL', 'T-9') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-9.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-14.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-14.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-9.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-9.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-9.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-9.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-14.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-14.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'P-14') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-14.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-MSL', 'T-8') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-8.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-14.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-14.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/T-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-8.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/T-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/T-8.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'P-11') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-11.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/P-11.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-11.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/P-11.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/P-11.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'D-15') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-15.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-15.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-15.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-15.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-15.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/D-16.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-16.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'D-16') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-16.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n",
      "[('NASA-MSL', 'M-7') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-7.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/D-16.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/D-16.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/M-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-7.train.csv\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/M-7.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/M-7.test.csv\n",
      "Directories /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL already exist\n",
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/train/F-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-8.train.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[('NASA-MSL', 'F-8') (test)] /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-8.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/NASA Spacecraft Telemetry Data/test/F-8.npy -> /home/projects/akita/data/benchmark-data/data-processed/univariate/NASA-MSL/F-8.test.csv\n"
     ]
    }
   ],
   "source": [
    "# dataset transformation\n",
    "transform_file: Callable[[str, str, List[str], bool], None] = transform_and_label\n",
    "\n",
    "meta = pd.read_csv(os.path.join(source_folder, \"labeled_anomalies.csv\"))\n",
    "\n",
    "for _, dataset in meta.iterrows():\n",
    "    \n",
    "    dataset_name = dataset[\"chan_id\"]\n",
    "    collection_name = dataset_collection_prefix + \"-\" + dataset[\"spacecraft\"]\n",
    "    dataset_length = dataset[\"num_values\"]\n",
    "    dataset_subfolder, target_subfolder = create_target_subfolder(input_type, collection_name)\n",
    "    \n",
    "    windows = json.loads(dataset[\"anomaly_sequences\"])\n",
    "    \n",
    "    meta = None\n",
    "    paths = {}\n",
    "    for t_type in [\"train\", \"test\"]:\n",
    "        source_file = os.path.join(source_folder, t_type, dataset_name + \".npy\")\n",
    "        filename = f\"{dataset_name}.{t_type}.csv\"\n",
    "        path = os.path.join(dataset_subfolder, filename)\n",
    "        target_filepath = os.path.join(target_subfolder, filename)\n",
    "        meta_target_filepath = os.path.join(target_subfolder, f\"{dataset_name}.{Datasets.METADATA_FILENAME_PREFIX}\")\n",
    "        paths[t_type] = path\n",
    "        \n",
    "        # transform file\n",
    "        df = transform_file(source_file, target_filepath, windows, force_all_normal=(t_type == \"train\"))\n",
    "        da = DatasetAnalyzer((collection_name, dataset_name), is_train=(t_type == \"train\"), df=df)\n",
    "        da.save_to_json(meta_target_filepath, overwrite=(t_type != \"train\"))\n",
    "        if t_type != \"train\":\n",
    "            meta = da.metadata\n",
    "        print(f\"Processed source dataset {source_file} -> {target_filepath}\")\n",
    "\n",
    "    # save metadata\n",
    "    dm.add_dataset(DatasetRecord(\n",
    "        collection_name=collection_name,\n",
    "        dataset_name=dataset_name,\n",
    "        train_path=paths[\"train\"],\n",
    "        test_path=paths[\"test\"],\n",
    "        dataset_type=dataset_type,\n",
    "        datetime_index=datetime_index,\n",
    "        split_at=None,\n",
    "        train_type=train_type,\n",
    "        train_is_normal=train_is_normal,\n",
    "        input_type=input_type,\n",
    "        length=meta.length,\n",
    "        dimensions=meta.dimensions,\n",
    "        contamination=meta.contamination,\n",
    "        num_anomalies=meta.num_anomalies,\n",
    "        min_anomaly_length=meta.anomaly_length.min,\n",
    "        median_anomaly_length=meta.anomaly_length.median,\n",
    "        max_anomaly_length=meta.anomaly_length.max,\n",
    "        mean=meta.mean,\n",
    "        stddev=meta.stddev,\n",
    "        trend=meta.trend,\n",
    "        stationarity=meta.get_stationarity_name(),\n",
    "    ))\n",
    "\n",
    "# save metadata of benchmark\n",
    "dm.save()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>train_path</th>\n",
       "      <th>test_path</th>\n",
       "      <th>dataset_type</th>\n",
       "      <th>datetime_index</th>\n",
       "      <th>split_at</th>\n",
       "      <th>train_type</th>\n",
       "      <th>train_is_normal</th>\n",
       "      <th>input_type</th>\n",
       "      <th>length</th>\n",
       "      <th>dimensions</th>\n",
       "      <th>contamination</th>\n",
       "      <th>num_anomalies</th>\n",
       "      <th>min_anomaly_length</th>\n",
       "      <th>median_anomaly_length</th>\n",
       "      <th>max_anomaly_length</th>\n",
       "      <th>mean</th>\n",
       "      <th>stddev</th>\n",
       "      <th>trend</th>\n",
       "      <th>stationarity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>collection_name</th>\n",
       "      <th>dataset_name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">NASA-MSL</th>\n",
       "      <th>C-1</th>\n",
       "      <td>univariate/NASA-MSL/C-1.train.csv</td>\n",
       "      <td>univariate/NASA-MSL/C-1.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>2264</td>\n",
       "      <td>1</td>\n",
       "      <td>0.137809</td>\n",
       "      <td>2</td>\n",
       "      <td>111</td>\n",
       "      <td>156</td>\n",
       "      <td>201</td>\n",
       "      <td>-0.596494</td>\n",
       "      <td>0.422230</td>\n",
       "      <td>no trend</td>\n",
       "      <td>difference_stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C-2</th>\n",
       "      <td>univariate/NASA-MSL/C-2.train.csv</td>\n",
       "      <td>univariate/NASA-MSL/C-2.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>2051</td>\n",
       "      <td>1</td>\n",
       "      <td>0.066797</td>\n",
       "      <td>2</td>\n",
       "      <td>36</td>\n",
       "      <td>68</td>\n",
       "      <td>101</td>\n",
       "      <td>-0.527869</td>\n",
       "      <td>0.469327</td>\n",
       "      <td>no trend</td>\n",
       "      <td>difference_stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-14</th>\n",
       "      <td>univariate/NASA-MSL/D-14.train.csv</td>\n",
       "      <td>univariate/NASA-MSL/D-14.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>2625</td>\n",
       "      <td>1</td>\n",
       "      <td>0.084571</td>\n",
       "      <td>2</td>\n",
       "      <td>21</td>\n",
       "      <td>111</td>\n",
       "      <td>201</td>\n",
       "      <td>-0.859483</td>\n",
       "      <td>0.509250</td>\n",
       "      <td>no trend</td>\n",
       "      <td>stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-15</th>\n",
       "      <td>univariate/NASA-MSL/D-15.train.csv</td>\n",
       "      <td>univariate/NASA-MSL/D-15.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>2158</td>\n",
       "      <td>1</td>\n",
       "      <td>0.297034</td>\n",
       "      <td>1</td>\n",
       "      <td>641</td>\n",
       "      <td>641</td>\n",
       "      <td>641</td>\n",
       "      <td>0.767703</td>\n",
       "      <td>0.664939</td>\n",
       "      <td>no trend</td>\n",
       "      <td>stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-16</th>\n",
       "      <td>univariate/NASA-MSL/D-16.train.csv</td>\n",
       "      <td>univariate/NASA-MSL/D-16.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>2191</td>\n",
       "      <td>1</td>\n",
       "      <td>0.297125</td>\n",
       "      <td>1</td>\n",
       "      <td>651</td>\n",
       "      <td>651</td>\n",
       "      <td>651</td>\n",
       "      <td>-0.511282</td>\n",
       "      <td>0.550257</td>\n",
       "      <td>no trend</td>\n",
       "      <td>trend_stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">NASA-SMAP</th>\n",
       "      <th>R-1</th>\n",
       "      <td>univariate/NASA-SMAP/R-1.train.csv</td>\n",
       "      <td>univariate/NASA-SMAP/R-1.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>7244</td>\n",
       "      <td>1</td>\n",
       "      <td>0.011182</td>\n",
       "      <td>1</td>\n",
       "      <td>81</td>\n",
       "      <td>81</td>\n",
       "      <td>81</td>\n",
       "      <td>0.999724</td>\n",
       "      <td>0.023499</td>\n",
       "      <td>no trend</td>\n",
       "      <td>difference_stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S-1</th>\n",
       "      <td>univariate/NASA-SMAP/S-1.train.csv</td>\n",
       "      <td>univariate/NASA-SMAP/S-1.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>7331</td>\n",
       "      <td>1</td>\n",
       "      <td>0.061110</td>\n",
       "      <td>1</td>\n",
       "      <td>448</td>\n",
       "      <td>448</td>\n",
       "      <td>448</td>\n",
       "      <td>-0.156789</td>\n",
       "      <td>0.461390</td>\n",
       "      <td>no trend</td>\n",
       "      <td>stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>T-1</th>\n",
       "      <td>univariate/NASA-SMAP/T-1.train.csv</td>\n",
       "      <td>univariate/NASA-SMAP/T-1.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>8612</td>\n",
       "      <td>1</td>\n",
       "      <td>0.178356</td>\n",
       "      <td>2</td>\n",
       "      <td>36</td>\n",
       "      <td>768</td>\n",
       "      <td>1500</td>\n",
       "      <td>0.485297</td>\n",
       "      <td>0.572614</td>\n",
       "      <td>no trend</td>\n",
       "      <td>difference_stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>T-2</th>\n",
       "      <td>univariate/NASA-SMAP/T-2.train.csv</td>\n",
       "      <td>univariate/NASA-SMAP/T-2.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>8625</td>\n",
       "      <td>1</td>\n",
       "      <td>0.206957</td>\n",
       "      <td>1</td>\n",
       "      <td>1785</td>\n",
       "      <td>1785</td>\n",
       "      <td>1785</td>\n",
       "      <td>0.483530</td>\n",
       "      <td>0.575923</td>\n",
       "      <td>no trend</td>\n",
       "      <td>difference_stationary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>T-3</th>\n",
       "      <td>univariate/NASA-SMAP/T-3.train.csv</td>\n",
       "      <td>univariate/NASA-SMAP/T-3.test.csv</td>\n",
       "      <td>real</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>semi-supervised</td>\n",
       "      <td>True</td>\n",
       "      <td>univariate</td>\n",
       "      <td>8579</td>\n",
       "      <td>1</td>\n",
       "      <td>0.021448</td>\n",
       "      <td>2</td>\n",
       "      <td>83</td>\n",
       "      <td>92</td>\n",
       "      <td>101</td>\n",
       "      <td>0.979773</td>\n",
       "      <td>0.032999</td>\n",
       "      <td>no trend</td>\n",
       "      <td>difference_stationary</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>81 rows × 19 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      train_path  \\\n",
       "collection_name dataset_name                                       \n",
       "NASA-MSL        C-1            univariate/NASA-MSL/C-1.train.csv   \n",
       "                C-2            univariate/NASA-MSL/C-2.train.csv   \n",
       "                D-14          univariate/NASA-MSL/D-14.train.csv   \n",
       "                D-15          univariate/NASA-MSL/D-15.train.csv   \n",
       "                D-16          univariate/NASA-MSL/D-16.train.csv   \n",
       "...                                                          ...   \n",
       "NASA-SMAP       R-1           univariate/NASA-SMAP/R-1.train.csv   \n",
       "                S-1           univariate/NASA-SMAP/S-1.train.csv   \n",
       "                T-1           univariate/NASA-SMAP/T-1.train.csv   \n",
       "                T-2           univariate/NASA-SMAP/T-2.train.csv   \n",
       "                T-3           univariate/NASA-SMAP/T-3.train.csv   \n",
       "\n",
       "                                                      test_path dataset_type  \\\n",
       "collection_name dataset_name                                                   \n",
       "NASA-MSL        C-1            univariate/NASA-MSL/C-1.test.csv         real   \n",
       "                C-2            univariate/NASA-MSL/C-2.test.csv         real   \n",
       "                D-14          univariate/NASA-MSL/D-14.test.csv         real   \n",
       "                D-15          univariate/NASA-MSL/D-15.test.csv         real   \n",
       "                D-16          univariate/NASA-MSL/D-16.test.csv         real   \n",
       "...                                                         ...          ...   \n",
       "NASA-SMAP       R-1           univariate/NASA-SMAP/R-1.test.csv         real   \n",
       "                S-1           univariate/NASA-SMAP/S-1.test.csv         real   \n",
       "                T-1           univariate/NASA-SMAP/T-1.test.csv         real   \n",
       "                T-2           univariate/NASA-SMAP/T-2.test.csv         real   \n",
       "                T-3           univariate/NASA-SMAP/T-3.test.csv         real   \n",
       "\n",
       "                              datetime_index  split_at       train_type  \\\n",
       "collection_name dataset_name                                              \n",
       "NASA-MSL        C-1                    False       NaN  semi-supervised   \n",
       "                C-2                    False       NaN  semi-supervised   \n",
       "                D-14                   False       NaN  semi-supervised   \n",
       "                D-15                   False       NaN  semi-supervised   \n",
       "                D-16                   False       NaN  semi-supervised   \n",
       "...                                      ...       ...              ...   \n",
       "NASA-SMAP       R-1                    False       NaN  semi-supervised   \n",
       "                S-1                    False       NaN  semi-supervised   \n",
       "                T-1                    False       NaN  semi-supervised   \n",
       "                T-2                    False       NaN  semi-supervised   \n",
       "                T-3                    False       NaN  semi-supervised   \n",
       "\n",
       "                              train_is_normal  input_type  length  dimensions  \\\n",
       "collection_name dataset_name                                                    \n",
       "NASA-MSL        C-1                      True  univariate    2264           1   \n",
       "                C-2                      True  univariate    2051           1   \n",
       "                D-14                     True  univariate    2625           1   \n",
       "                D-15                     True  univariate    2158           1   \n",
       "                D-16                     True  univariate    2191           1   \n",
       "...                                       ...         ...     ...         ...   \n",
       "NASA-SMAP       R-1                      True  univariate    7244           1   \n",
       "                S-1                      True  univariate    7331           1   \n",
       "                T-1                      True  univariate    8612           1   \n",
       "                T-2                      True  univariate    8625           1   \n",
       "                T-3                      True  univariate    8579           1   \n",
       "\n",
       "                              contamination  num_anomalies  \\\n",
       "collection_name dataset_name                                 \n",
       "NASA-MSL        C-1                0.137809              2   \n",
       "                C-2                0.066797              2   \n",
       "                D-14               0.084571              2   \n",
       "                D-15               0.297034              1   \n",
       "                D-16               0.297125              1   \n",
       "...                                     ...            ...   \n",
       "NASA-SMAP       R-1                0.011182              1   \n",
       "                S-1                0.061110              1   \n",
       "                T-1                0.178356              2   \n",
       "                T-2                0.206957              1   \n",
       "                T-3                0.021448              2   \n",
       "\n",
       "                              min_anomaly_length  median_anomaly_length  \\\n",
       "collection_name dataset_name                                              \n",
       "NASA-MSL        C-1                          111                    156   \n",
       "                C-2                           36                     68   \n",
       "                D-14                          21                    111   \n",
       "                D-15                         641                    641   \n",
       "                D-16                         651                    651   \n",
       "...                                          ...                    ...   \n",
       "NASA-SMAP       R-1                           81                     81   \n",
       "                S-1                          448                    448   \n",
       "                T-1                           36                    768   \n",
       "                T-2                         1785                   1785   \n",
       "                T-3                           83                     92   \n",
       "\n",
       "                              max_anomaly_length      mean    stddev  \\\n",
       "collection_name dataset_name                                           \n",
       "NASA-MSL        C-1                          201 -0.596494  0.422230   \n",
       "                C-2                          101 -0.527869  0.469327   \n",
       "                D-14                         201 -0.859483  0.509250   \n",
       "                D-15                         641  0.767703  0.664939   \n",
       "                D-16                         651 -0.511282  0.550257   \n",
       "...                                          ...       ...       ...   \n",
       "NASA-SMAP       R-1                           81  0.999724  0.023499   \n",
       "                S-1                          448 -0.156789  0.461390   \n",
       "                T-1                         1500  0.485297  0.572614   \n",
       "                T-2                         1785  0.483530  0.575923   \n",
       "                T-3                          101  0.979773  0.032999   \n",
       "\n",
       "                                 trend           stationarity  \n",
       "collection_name dataset_name                                   \n",
       "NASA-MSL        C-1           no trend  difference_stationary  \n",
       "                C-2           no trend  difference_stationary  \n",
       "                D-14          no trend             stationary  \n",
       "                D-15          no trend             stationary  \n",
       "                D-16          no trend       trend_stationary  \n",
       "...                                ...                    ...  \n",
       "NASA-SMAP       R-1           no trend  difference_stationary  \n",
       "                S-1           no trend             stationary  \n",
       "                T-1           no trend  difference_stationary  \n",
       "                T-2           no trend  difference_stationary  \n",
       "                T-3           no trend  difference_stationary  \n",
       "\n",
       "[81 rows x 19 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dm.refresh()\n",
    "dm.df().loc[(slice(\"NASA-MSL\", \"NASA-SMAP\"), slice(None))]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experimentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "meta = pd.read_csv(os.path.join(source_folder, \"labeled_anomalies.csv\"))\n",
    "json.loads(meta[\"anomaly_sequences\"][0])\n",
    "dataset = next(meta.iterrows())[1]\n",
    "print(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "columns = [\"value\"] + [\"C\" + str(i) for i in range(24)]\n",
    "filename = os.path.join(source_folder, \"test\", dataset[\"chan_id\"] + \".npy\")\n",
    "print(f\"loading {filename}\")\n",
    "dd = np.load(filename)\n",
    "df = pd.DataFrame(dd, columns=columns)\n",
    "df.index.name = \"timestamp\"\n",
    "df = df[[\"value\"]]\n",
    "df[\"is_anomaly\"] = 0\n",
    "\n",
    "windows = json.loads(dataset[\"anomaly_sequences\"])\n",
    "\n",
    "for t1, t2 in windows:\n",
    "    tmp = df[df.index >= t1]\n",
    "    tmp = tmp[tmp.index <= t2]\n",
    "    df[\"is_anomaly\"].values[tmp.index] = 1\n",
    "\n",
    "print(windows)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainfile = os.path.join(source_folder, \"train\", dataset[\"chan_id\"] + \".npy\")\n",
    "testfile = os.path.join(source_folder, \"test\", dataset[\"chan_id\"] + \".npy\")\n",
    "windows = json.loads(dataset[\"anomaly_sequences\"])\n",
    "\n",
    "transform_and_label(trainfile, \"P-1.train.csv\", windows, force_all_normal=True)\n",
    "transform_and_label(testfile, \"P-1.test.csv\", windows, force_all_normal=False)\n",
    "\n",
    "df = pd.read_csv(\"P-1.train.csv\", index_col=\"timestamp\")\n",
    "anomalies_in_train = len(df[df[\"is_anomaly\"] == 1])\n",
    "df = pd.read_csv(\"P-1.test.csv\", index_col=\"timestamp\")\n",
    "anomalies_in_test = len(df[df[\"is_anomaly\"] == 1])\n",
    "(anomalies_in_train, anomalies_in_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>value</th>\n",
       "      <th>is_anomaly</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2.146646</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2.146646</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2.146646</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2.151326</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2.163807</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2153</th>\n",
       "      <td>2153</td>\n",
       "      <td>-0.996880</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2154</th>\n",
       "      <td>2154</td>\n",
       "      <td>-0.998440</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2155</th>\n",
       "      <td>2155</td>\n",
       "      <td>-0.982839</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2156</th>\n",
       "      <td>2156</td>\n",
       "      <td>-0.976599</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157</th>\n",
       "      <td>2157</td>\n",
       "      <td>-0.976599</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2158 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      timestamp     value  is_anomaly\n",
       "0             0  2.146646           0\n",
       "1             1  2.146646           0\n",
       "2             2  2.146646           0\n",
       "3             3  2.151326           0\n",
       "4             4  2.163807           0\n",
       "...         ...       ...         ...\n",
       "2153       2153 -0.996880           0\n",
       "2154       2154 -0.998440           0\n",
       "2155       2155 -0.982839           0\n",
       "2156       2156 -0.976599           0\n",
       "2157       2157 -0.976599           0\n",
       "\n",
       "[2158 rows x 3 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pathlib import Path\n",
    "df = pd.read_csv(Path(target_folder) / \"univariate\" / \"NASA-MSL\" / \"C-1.train.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.52315476, -0.68885677, -0.55499272, ..., -0.72575974,\n",
       "       -0.68958026, -0.70984087])"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pathlib import Path\n",
    "np.load(Path(source_folder) / \"train\" / \"P-1.npy\")[:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "timeeval38",
   "language": "python",
   "name": "timeeval38"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
